library(plyr)
library(tidyverse)
library(readstata13)

set.seed(10123)

setwd('output')

data <- read.dta13('../data/combined.dta')

data <- data %>%
  mutate(universe = case_when(
    universe == 'imm_canvass' ~ 'Study 1',
    universe == 'imm_phone' ~ 'Study 2',
    universe == 'partisan_canvass' ~ 'Study 3'
  ))

# before/after graph

g <- ggplot(data, aes(x = therm_trump_voters0, y = therm_trump_voters1)) +
  geom_point(position = 'jitter', alpha = .25) +
  geom_abline(slope = 1, intercept = 0) +
  theme_minimal() +
  facet_wrap(~ universe) +
  xlab('Baseline Ratings') + ylab('Endline Ratings') +
  ggtitle('Feeling Thermometer Ratings: Trump Voters')
ggsave('fig1a_trump_voters_points.png', g, width = 7.5, height = 3.75)

g <- ggplot(data, aes(x = therm_gop_voters0, y = therm_gop_voters1)) +
  geom_point(position = 'jitter', alpha = .25) +
  geom_abline(slope = 1, intercept = 0) +
  theme_minimal() +
  facet_wrap(~ universe) +
  xlab('Baseline Ratings') + ylab('Endline Ratings') +
  ggtitle('Feeling Thermometer Ratings: Republican Voters')
ggsave('fig1b_gop_voters_points.png', g, width = 7.5, height = 3.75)

sum.dif <- function(before, after) {
  all.present <- !is.na(before) & !is.na(after)
  before <- before[all.present]
  after <- after[all.present]
  t.t <- t.test(after, before, paired = T)
  return(c(mean.diff = t.t$estimate,
           se = t.t$stderr,
           median.diff = median(after - before)))
}

sum.dif.one.study <- function(study) {
  if(study != 'All') {
    df <- data %>% filter(universe == paste0('Study ', study))
    study <- paste0('Study ', study)
  } else {
    df <- data
  }
  tvs <- c(sum.dif(df$therm_trump_voters0, df$therm_trump_voters1), study = study, dv = 'Ratings of Trump Voters')
  gops <- c(sum.dif(df$therm_gop_voters0, df$therm_gop_voters1), study = study, dv = 'Ratings of Republican Voters')
  cands <- c(sum.dif(df$therm_trump_voters_diff0, df$therm_trump_voters_diff1), study = study, dv = 'Affective Polarization: Difference in Ratings of Trump Supporters - Trump Opponents')
  parties <- c(sum.dif(df$therm_partisan_voters_diff0, df$therm_partisan_voters_diff1), study = study, dv = 'Affective Polarization: Difference in Ratings of Republican - Democratic Voters')
  return(rbind(tvs, gops, cands, parties))
}

results.by.study <- adply(c(1:3, 'All'), 1, sum.dif.one.study, .id = NULL)

g <- results.by.study %>%
  mutate(est = as.numeric(`mean.diff.mean of the differences`),
         se = as.numeric(se),
         study = factor(study, ordered = T, levels =
                             c('All', paste0('Study ', 3:1))),
         dv = str_wrap(dv, width = 33)) %>%
  mutate(dv = factor(dv, ordered = T, levels = unique(dv)[c(1:4)])) %>%
  ggplot(aes(x = est, y = study)) +
  geom_point() +
  geom_linerange(aes(xmax = est + se, xmin = est - se), size = 1) +
  geom_linerange(aes(xmax = est + 1.96*se, xmin = est - 1.96*se), size = .5) +
  facet_wrap(~dv, scales = 'free') +
  expand_limits(x = 0) +
  geom_vline(xintercept = 0) +
  theme_minimal() +
  xlab('Average Change in Feeling Thermometer Ratings, Endline - Baseline') + ylab('Study')
ggsave('fig2_all_coefs.png', g, width = 7.5, height = 5)


# by open ended code
data <- read_csv('../data/study3_with_open_ends.csv')

sum.diff.one.subgroup <- function(subgroup) {
  df <- data %>% filter(open_end_coding1 == subgroup)
  tvs <- c(sum.dif(df$therm_trump_voters0, df$therm_trump_voters1), subgroup = subgroup, dv = 'Ratings of Trump Voters')
  gops <- c(sum.dif(df$therm_rep_voters0, df$therm_rep_voters1), subgroup = subgroup, dv = 'Ratings of Republican Voters')
  return(rbind(tvs, gops))
}

data <- data %>%
  mutate(open_end_coding1 = ifelse(grepl('Other', open_end_coding1), 'Other (Program or Personal Skills)', open_end_coding1))

results.by.code <- adply(unique(data$open_end_coding1), 1, sum.diff.one.subgroup, .id = NULL)

subgroups.in.order <- results.by.code %>%
  select(-se, -median.diff) %>%
  pivot_wider(names_from = dv,
              values_from = `mean.diff.mean of the differences`) %>%
  mutate(sum = as.numeric(`Ratings of Trump Voters`) + as.numeric(`Ratings of Republican Voters`)) %>%
  arrange(sum) %>%
  pull(subgroup)

g <- results.by.code %>%
  mutate(est = as.numeric(`mean.diff.mean of the differences`),
         se = as.numeric(se),
         subgroup = factor(subgroup, ordered = T, levels = subgroups.in.order),
         dv = str_wrap(dv, width = 33)) %>%
  mutate(dv = factor(dv, ordered = T, levels = unique(dv)[c(1:4)])) %>%
  ggplot(aes(x = est, y = subgroup)) +
  geom_point() +
  geom_linerange(aes(xmax = est + se, xmin = est - se), size = 1) +
  geom_linerange(aes(xmax = est + 1.96*se, xmin = est - 1.96*se), size = .5) +
  facet_wrap(~dv, scales = 'free') +
  expand_limits(x = 0) +
  geom_vline(xintercept = 0) +
  theme_minimal() +
  xlab('Average Change in Feeling Thermometer Ratings, Endline - Baseline') + ylab('Open Ended Response')
ggsave('by_open_end.pdf', g, width = 7.5, height = 2.5)

